--- title: '[RC17]DE Analysis of Oligodendroglia' author: "Nina-Lydia Kazakou" date: "15/03/2022" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Set-up ### Load libraries ```{r message=FALSE, warning=FALSE} library(SingleCellExperiment) library(Seurat) library(tidyverse) library(Matrix) library(scales) library(cowplot) library(RCurl) library(scDblFinder) library(Matrix) library(ggplot2) library(edgeR) library(dplyr) library(ggsci) library(here) library(gtools) library(viridis) ``` ### Colour Palette ```{r load_palette} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3<-pal_lancet("lanonc", alpha = 0.7)(9) mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4) ``` ### Load object ```{r} oligos <- readRDS(here("Data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds")) ``` ```{r} DimPlot(oligos, group.by = "ClusterID", pt.size = 0.9, label = TRUE, cols = mycoloursP[6:40]) & NoLegend() ``` # Differential Gene Expression Analysis I will perform differential gene expression testing within Seurat. Seurat's default is a Wilcoxon rank sum test. However, based on previous assessments of the differential gene expression methods within our lab, as well as literature (ref), we decided that the "MAST" method fits data better. MAST is a GLM-framework that treates cellular detection rate as a covariate that can also work within Seurat, but the MAST package should be loaded. MAST uses a two-part, generalized linear model for bimodal data that parameterizes both strongly non-zero or non-detectable features; the fraction of genes expressed in a cell, should be adjusted for as a source of nuisance variation. More details about MAST can be found here: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0844-5 ```{r} dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis")) dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers")) ``` ## All Markers ```{r All_Genes, eval=FALSE} Idents(oligos) <- "ClusterID" oligos_markers <- FindAllMarkers(oligos, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, term.use = "MAST") write.csv(oligos_markers, here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_All_Markers_ClusterID.csv")) ``` ```{r} oligos_markers <- read.csv(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_All_Markers_ClusterID.csv")) ``` ```{r Plot_top10_All_Genes} oligos_markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC) top10_oligos_markers <- oligos_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) ``` ```{r Cluster_Averages} cluster_averages <- AverageExpression(oligos, group.by = "ClusterID", return.seurat = TRUE) cluster_averages@meta.data$cluster <- factor(levels(oligos@meta.data$ClusterID)) saveRDS(cluster_averages, here("data", "Processed", "Oligodendroglia", "OL_AvgClu.rds")) ``` ```{r eval=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots")) ``` ```{r fig.height=15, fig.width=15, message=FALSE, warning=FALSE} DoHeatmap(object = cluster_averages, features = top10_oligos_markers$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) ``` ```{r eval=FALSE} plot1 <- DoHeatmap(object = cluster_averages, features = top10_oligos_markers$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 2.5) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Plot_top10_All_Genes.pdf"), paper="a4", width=10, height=16) print(plot1) dev.off ``` ```{r Filter_Genes} oligos_markers <- read.csv(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_All_Markers_ClusterID.csv")) fil_pct_1 = 0.25 fil_pct_2 = 0.1 oligos_fil_markers <- subset(oligos_markers, oligos_markers$pct.1 > fil_pct_1 & oligos_markers$pct.2 < fil_pct_2 ) write.csv(oligos_fil_markers, here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_Fil_Markers_ClusterID.csv")) ``` ```{r Plot_top10_Filter_Genes} oligos_fil_markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC) top10_oligos_fil_markers <- oligos_fil_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) ``` ```{r fig.height=15, fig.width=15, message=FALSE, warning=FALSE} DoHeatmap(object = cluster_averages, features = top10_oligos_fil_markers$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) ``` ```{r eval=FALSE} plot2 <- DoHeatmap(object = cluster_averages, features = top10_oligos_fil_markers$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Plot_top10_fil_Genes.pdf"), paper="a4", width=10, height=16) print(plot2) dev.off ``` ## Pairwise Marker Comparisons ```{r Pairwise_All_Cells, eval=FALSE} clust_id_list <- list(list("CyP_1","Cyp_2"), list("Cyp_2", "CyP_1"), list("OAPC", "SPARCL1+ OLIGO_3"), list("SPARCL1+ OLIGO_3", "OAPC"), list("OLIGO_1", "OLIGO_2"), list("OLIGO_2", "OLIGO_1"), list("OLIGO_1", "COP"), list("COP", "OLIGO_1"), list("pri-OPC", "OPC_1"), list("OPC_1", "pri-OPC"), list("COP", "OPC_2"), list("OPC_2", "COP"), list("COP", "OPC_1"), list("OPC_1", "COP"), list("OPC_1", "OPC_2"), list("OPC_2", "OPC_1"), list("OPC_1", "OPC_3"), list("OPC_3", "OPC_1"), list("OPC_3", "OPC_2"), list("OPC_2", "OPC_3")) pairwise_mark <- function(seur_obj, int_cols, clust_id_list, only_pos = TRUE, min_pct = 0.25, logfc_threshold = 0.25, fil_pct_1 = 0.25, fil_pct_2 = 0.1, save_dir = getwd(), test_use = "MAST"){ for(k in 1:length(int_cols)){ Idents(seur_obj) <- int_cols[k] for(i in 1: length(clust_id_list)){ clust_mark <- FindMarkers(seur_obj, ident.1 = clust_id_list[[i]][[1]], ident.2 = clust_id_list[[i]][[2]], min.pct = min_pct, test.use = test_use) clust_mark$clust <- clust_id_list[[i]][[1]] clust_mark$comp_to_clust <- clust_id_list[[i]][[2]] write.csv(clust_mark, paste(save_dir, int_cols[k], "_", clust_id_list[[i]][[1]], "_", clust_id_list[[i]][[2]], ".csv", sep = "" )) } } } dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Pairwise_Comaparisons")) pairwise_mark(oligos, int_cols = "ClusterID", save_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Pairwise_Comaparisons"), clust_id_list = clust_id_list) ``` ```{r Pairwise_Filter_Genes} gen_mark_list <-function(file_dir = getwd(), avg_log = 1.2, pct_1 = 0.25, pct_2 = 0.5, pairwise = FALSE ){ temp = list.files(path = file_dir, pattern="*.csv") myfiles = lapply(paste(file_dir, temp, sep = "/"), read.csv) for(i in 1:length(myfiles)){ dat <- myfiles[[i]] av_log_fil <- subset(dat, dat$avg_log2FC > avg_log & dat$pct.1 > pct_1 & dat$pct.2 < pct_2) if(pairwise == TRUE){ top10 <- av_log_fil %>% top_n(10, avg_log2FC) top10$gene <- top10$X }else{ av_log_fil$cluster <- as.character(av_log_fil$cluster) top10 <- av_log_fil %>% group_by(cluster) %>% top_n(10, avg_log2FC) } if(i ==1){ fil_genes <- top10 }else{ fil_genes <- rbind(fil_genes, top10) } fil_genes <- fil_genes[!duplicated(fil_genes$gene),] } return(fil_genes) } oligos_fil_pw <- gen_mark_list(file_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Pairwise_Comaparisons"), pairwise = TRUE) oligos_fil_markers <- read.csv(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_Fil_Markers_ClusterID.csv")) int_genes <- c(oligos_fil_markers$gene, oligos_fil_pw$gene) int_genes <- unique(int_genes) length(int_genes) ``` ```{r Plot_top100_int_genes, fig.height=100, fig.width=15, message=FALSE, warning=FALSE} DoHeatmap(object = cluster_averages, features = int_genes, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) ``` ```{r eval=FALSE} plot3 <- DoHeatmap(object = cluster_averages, features = int_genes, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Intersction_Genes_Fil_PW.pdf"), paper="a4", width=10, height=16) print(plot3) dev.off ``` #### Save VlnPlots & FeaturePlots from all int_genes to identify potentially interesting ones: ```{r VlnPlots, eval=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_VlnPlots")) save_vln_plots <- function(seur_obj, marker_list, save_dir = getwd(), numb_genes = 10, plotheight = 20, plotwidth = 8, group_by = Idents(seur_obj), pt_size = 0.1, n_col = 1){ gene_groups <- split(marker_list, ceiling(seq_along(marker_list) / numb_genes)) for(i in 1:length(gene_groups)){ pdf(paste(save_dir, "/clust_marker_vln_", i, ".pdf", sep=""), height=plotheight, width = plotwidth) finalPlot<-VlnPlot(seur_obj, features = unlist(gene_groups[i]), group.by = group_by, pt.size= pt_size, ncol=1) print(finalPlot) graphics.off() } } save_vln_plots(seur_obj= oligos, marker_list = int_genes, save_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_VlnPlots"), group_by = "ClusterID", pt_size = 0.1, plotheight = 30) ``` ```{r FeautrePlots, eval=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_FeaturePlots")) save_feat_plots <- function(seur_obj, marker_list, save_dir = getwd(), numb_genes = 16, plotheight = 18, plotwidth = 20, split_by = "NULL", n_col = 4){ gene_groups <- split(marker_list, ceiling(seq_along(marker_list) / numb_genes)) for(i in 1:length(gene_groups)){ if(split_by != "NULL"){ feature_plot<-FeaturePlot(seur_obj, features = unlist(gene_groups[i]), ncol = n_col, split.by = split_by) }else{ feature_plot<-FeaturePlot(seur_obj, features = unlist(gene_groups[i]), ncol = n_col) } pdf(paste(save_dir, "/clust_marker_feat_", i, ".pdf", sep=""), height=plotheight, width = plotwidth) print(feature_plot) graphics.off() } } save_feat_plots(seur_obj= oligos, marker_list = int_genes, save_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_VlnPlots")) ``` ### Some more plots: ```{r} annotation_genes <- c("OLIG2", "SOX10", "PDGFRA", "PCDH15", "MBP", "MOG", "HOPX", "SPARCL1", "EGFR", "DLL1", "NKX2-2", "MYC", "MKI67", "TOP2A") ``` ```{r fig.height=8, fig.width=12, message=FALSE, warning=FALSE} DoHeatmap(object = cluster_averages, features = annotation_genes, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) ``` ```{r eval=FALSE} plot4 <-DoHeatmap(object = cluster_averages, features = annotation_genes, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Annotation_Genes.pdf"), paper="a4", width=10, height=16) print(plot4) dev.off ``` ```{r fig.width=9, fig.height= 5} DotPlot(oligos, features = annotation_genes, cols = c("blue", "red")) + FontSize(7) ``` ```{r eval=FALSE} plot5 <- DotPlot(oligos, features = annotation_genes, cols = c("blue", "red")) + FontSize(5) pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Annotation_Genes_DotPlot.pdf"), paper="a4", width=10, height=4) print(plot5) dev.off ``` ```{r} sessionInfo() ```